from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
import mlflow
import mlflow.sklearn
print("done")
data = load_iris()
type(data)
data.target
data.feature_names
X=data.data
Y=data.target
type(X)
X_train,X_test,y_train,y_test=train_test_split(X,Y, test_size=0.25, random_state=0)
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
X= data.data[:, :2]
Y=data.target
x_min,x_max=X[:,0].min()-.5,X[:,0].max()+.5
y_min,y_max=X[:,1].min()-.5,X[:,1].max()+.5
!rm -r /dbfs/ci_challenge/mlflow/iris
plt.figure(2,figsize=(8,6))
plt.clf()
#plot traing points
plt.scatter(X[:,0],X[:,1],c=Y,cmap=plt.cm.Set1,edgecolor='k')
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xtricks(())
plt.ytricks(())
fig=plt.figure(1,figsize=(8,6))
ax=Axes3D(fig,elev=-150,azim=110)
X_reduced=PCA(n_components=3).fit_transform(data.data)
ax_scatter(X_reduced[:,0],X_reduced[:,1],X_reduced[:,2],c=Y,cmap=plt.cm.Set1,edgecolor='k',s=40)
ax.set_title("First 3 dimenional PCA directions")
ax.set_x_label("1st eigenvectors")
ax.w_xaxis.set_ticklabels([])
ax.set_x_label("2nd eigenvectors")
ax.w_yaxis.set_ticklabels([])
ax.set_x_label("3rd eigenvectors")
ax.w_zaxis.set_ticklabels([])
fig.savefig("/dbfs/ci_challenge/mlflow/iris/iris.png")
plt.close(fig)
display()
with mlflow.start_run():
dtc=DecisionTreeClassifier(random_state=10)
dtc.fit(X_train,y_train)
y_pred_class=dtc.predict(X_test)
accuracy=metrics.accuracy_score(y_test,y_pred_class)
print(accuracy)
mlflow.log_param("random_state",10)
mlflow.log_metric("accuracy",accuracy) #logs a single key-value metric
mlflow.sklearn.log_model(dtc,"model") #logs a local file as an artifact, optionally taking an artifact_path to place it in within the run’s artifact URI. Run artifacts can be organized into directories, so you can
modelpath="/dbfs/ci_challenge/mlflow/iris/model-%s-%f" %("decsion_tree",1)
mlflow.sklearn.save_model(dtc,modelpath) #saving pkl file
#mlflow.log_artifact("/dbfs/ci_challenge/mlflow/iris/iris.png")
with mlflow.start_run():
dtc=DecisionTreeClassifier(max_depth=1,random_state=10)
dtc.fit(X_train,y_train)
y_pred_class=dtc.predict(X_test)
accuracy=metrics.accuracy_score(y_test,y_pred_class)
print(accuracy)
mlflow.log_param("random_state",10)
mlflow.log_metric("accuracy",accuracy)
mlflow.sklearn.log_model(dtc,"model")
modelpath="/dbfs/ci_challenge/mlflow/iris/model-%s-%f" %("decsion_tree",2)
mlflow.sklearn.save_model(dtc,modelpath)
#mlflow.log_artifact("iris1.png")
i=3
with mlflow.start_run():
knn=KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train,y_train)
y_pred_class=knn.predict(X_test)
accuracy=metrics.accuracy_score(y_test,y_pred_class)
print(accuracy)
mlflow.log_param("n_neighbors",5)
mlflow.log_metric("accuracy",accuracy)
mlflow.sklearn.log_model(dtc,"model")
modelpath="/dbfs/ci_challenge/mlflow/iris/model-%s-%f" %("knn",i)
mlflow.sklearn.save_model(knn,modelpath)
#mlflow.log_artifact("iris1.png")
i=i+1
with mlflow.start_run():
knn=KNeighborsClassifier(n_neighbors=2)
knn.fit(X_train,y_train)
y_pred_class=knn.predict(X_test)
accuracy=metrics.accuracy_score(y_test,y_pred_class)
print(accuracy)
mlflow.log_param("n_neighbors",2)
mlflow.log_metric("accuracy",accuracy)
mlflow.sklearn.log_model(dtc,"model")
modelpath="/dbfs/ci_challenge/mlflow/iris/model-%s-%f" %("knn",i)
mlflow.sklearn.save_model(knn,modelpath)
#mlflow.log_artifact("iris1.png")
i=i+1
%sh
cd /dbfs/
ls
%sh
pwd
ls